| deseq2 |
1 |
- results/diffexp/neuron-vs-glia.diffexp.tsv
- results/diffexp/neuron-vs-glia.ma-plot.svg
|
|
- r-base =3.4.1
- bioconductor-deseq2 =1.18.1
- bioconductor-annotate =1.56.0
- bioconductor-annotationdbi =1.40.0
- bioconductor-biobase =2.38.0
- bioconductor-biocgenerics =0.24.0
- bioconductor-biocparallel =1.12.0
- bioconductor-delayedarray =0.4.1
- bioconductor-genefilter =1.60.0
- bioconductor-geneplotter =1.56.0
- bioconductor-genomeinfodb =1.14.0
- bioconductor-genomeinfodbdata =1.0.0
- bioconductor-genomicranges =1.30.0
- bioconductor-iranges =2.12.0
- bioconductor-s4vectors =0.16.0
- bioconductor-summarizedexperiment =1.8.0
- bioconductor-tximport =1.6.0
- bioconductor-xvector =0.18.0
- bioconductor-zlibbioc =1.24.0
- r-acepack =1.4.1
- r-backports =1.0.5
- r-base64enc =0.1_3
- r-bh =1.65.0_1
- r-bit =1.1_12
- r-bit64 =0.9_5
- r-bitops =1.0_6
- r-blob =1.1.0
- r-catools =1.17.1
- r-checkmate =1.8.2
- r-cluster =2.0.6
- r-colorspace =1.3_2
- r-dbi =0.6_1
- r-dichromat =2.0_0
- r-digest =0.6.12
- r-evaluate =0.10
- r-foreign =0.8_67
- r-formula =1.2_1
- r-futile.logger =1.4.3
- r-futile.options =1.0.0
- r-gdata =2.17.0
- r-getopt =1.20.0
- r-ggplot2 =2.2.0
- r-gplots =3.0.1
- r-gridextra =2.2.1
- r-gtable =0.2.0
- r-gtools =3.5.0
- r-highr =0.6
- r-hmisc =4.0_3
- r-htmltable =1.9
- r-htmltools =0.3.6
- r-htmlwidgets =0.9
- r-jsonlite =1.4
- r-kernsmooth =2.23_15
- r-knitr =1.16
- r-labeling =0.3
- r-lambda.r =1.1.9
- r-lattice =0.20_34
- r-latticeextra =0.6_28
- r-lazyeval =0.2.0
- r-locfit =1.5_9.1
- r-magrittr =1.5
- r-markdown =0.8
- r-mass =7.3_45
- r-matrix =1.2_7.1
- r-matrixstats =0.52.2
- r-memoise =1.1.0
- r-mime =0.5
- r-munsell =0.4.3
- r-nnet =7.3_12
- r-pkgconfig =2.0.1
- r-plogr =0.1_1
- r-plyr =1.8.4
- r-rcolorbrewer =1.1_2
- r-rcpp =0.12.13
- r-rcpparmadillo =0.7.800.2.0
- r-rcurl =1.95_4.8
- r-reshape2 =1.4.2
- r-rjson =0.2.15
- r-rlang =0.1.2
- r-rpart =4.1_10
- r-rsqlite =2.0
- r-scales =0.5.0
- r-snow =0.4_2
- r-stringi =1.1.2
- r-stringr =1.2.0
- r-survival =2.40_1
- r-tibble =1.3.3
- r-viridis =0.4.0
- r-viridislite =0.2.0
- r-xml =3.98_1.6
- r-xtable =1.8_2
- r-yaml =2.1.14
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 | log <- file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type="message")
library("DESeq2")
parallel <- FALSE
if (snakemake@threads > 1) {
library("BiocParallel")
# setup parallelization
register(MulticoreParam(snakemake@threads))
parallel <- TRUE
}
dds <- readRDS(snakemake@input[[1]])
contrast <- c("condition", snakemake@params[["contrast"]])
res <- results(dds, contrast=contrast, parallel=parallel)
# shrink fold changes for lowly expressed genes
res <- lfcShrink(dds, contrast=contrast, res=res)
# sort by p-value
res <- res[order(res$padj),]
# TODO explore IHW usage
# store results
svg(snakemake@output[["ma_plot"]])
plotMA(res, ylim=c(-2,2))
dev.off()
write.table(as.data.frame(res), file=snakemake@output[["table"]])
|
|
| pca |
1 |
|
|
- r-base =3.4.1
- bioconductor-deseq2 =1.18.1
- bioconductor-annotate =1.56.0
- bioconductor-annotationdbi =1.40.0
- bioconductor-biobase =2.38.0
- bioconductor-biocgenerics =0.24.0
- bioconductor-biocparallel =1.12.0
- bioconductor-delayedarray =0.4.1
- bioconductor-genefilter =1.60.0
- bioconductor-geneplotter =1.56.0
- bioconductor-genomeinfodb =1.14.0
- bioconductor-genomeinfodbdata =1.0.0
- bioconductor-genomicranges =1.30.0
- bioconductor-iranges =2.12.0
- bioconductor-s4vectors =0.16.0
- bioconductor-summarizedexperiment =1.8.0
- bioconductor-tximport =1.6.0
- bioconductor-xvector =0.18.0
- bioconductor-zlibbioc =1.24.0
- r-acepack =1.4.1
- r-backports =1.0.5
- r-base64enc =0.1_3
- r-bh =1.65.0_1
- r-bit =1.1_12
- r-bit64 =0.9_5
- r-bitops =1.0_6
- r-blob =1.1.0
- r-catools =1.17.1
- r-checkmate =1.8.2
- r-cluster =2.0.6
- r-colorspace =1.3_2
- r-dbi =0.6_1
- r-dichromat =2.0_0
- r-digest =0.6.12
- r-evaluate =0.10
- r-foreign =0.8_67
- r-formula =1.2_1
- r-futile.logger =1.4.3
- r-futile.options =1.0.0
- r-gdata =2.17.0
- r-getopt =1.20.0
- r-ggplot2 =2.2.0
- r-gplots =3.0.1
- r-gridextra =2.2.1
- r-gtable =0.2.0
- r-gtools =3.5.0
- r-highr =0.6
- r-hmisc =4.0_3
- r-htmltable =1.9
- r-htmltools =0.3.6
- r-htmlwidgets =0.9
- r-jsonlite =1.4
- r-kernsmooth =2.23_15
- r-knitr =1.16
- r-labeling =0.3
- r-lambda.r =1.1.9
- r-lattice =0.20_34
- r-latticeextra =0.6_28
- r-lazyeval =0.2.0
- r-locfit =1.5_9.1
- r-magrittr =1.5
- r-markdown =0.8
- r-mass =7.3_45
- r-matrix =1.2_7.1
- r-matrixstats =0.52.2
- r-memoise =1.1.0
- r-mime =0.5
- r-munsell =0.4.3
- r-nnet =7.3_12
- r-pkgconfig =2.0.1
- r-plogr =0.1_1
- r-plyr =1.8.4
- r-rcolorbrewer =1.1_2
- r-rcpp =0.12.13
- r-rcpparmadillo =0.7.800.2.0
- r-rcurl =1.95_4.8
- r-reshape2 =1.4.2
- r-rjson =0.2.15
- r-rlang =0.1.2
- r-rpart =4.1_10
- r-rsqlite =2.0
- r-scales =0.5.0
- r-snow =0.4_2
- r-stringi =1.1.2
- r-stringr =1.2.0
- r-survival =2.40_1
- r-tibble =1.3.3
- r-viridis =0.4.0
- r-viridislite =0.2.0
- r-xml =3.98_1.6
- r-xtable =1.8_2
- r-yaml =2.1.14
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14 | log <- file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type="message")
library("DESeq2")
# load deseq2 data
dds <- readRDS(snakemake@input[[1]])
# obtain normalized counts
counts <- rlog(dds, blind=FALSE)
svg(snakemake@output[[1]])
plotPCA(counts, intgroup=snakemake@params[["pca_labels"]])
dev.off()
|
|
| multiqc |
1 |
|
|
- multiqc ==1.2
- networkx <2.0
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 | """Snakemake wrapper for trimming paired-end reads using cutadapt."""
__author__ = "Julian de Ruiter"
__copyright__ = "Copyright 2017, Julian de Ruiter"
__email__ = "julianderuiter@gmail.com"
__license__ = "MIT"
from os import path
from snakemake.shell import shell
input_dirs = set(path.dirname(fp) for fp in snakemake.input)
output_dir = path.dirname(snakemake.output[0])
output_name = path.basename(snakemake.output[0])
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
shell(
"multiqc"
" {snakemake.params}"
" --force"
" -o {output_dir}"
" -n {output_name}"
" {input_dirs}"
" {log}")
|
|
| deseq2_init |
1 |
|
|
- r-base =3.4.1
- bioconductor-deseq2 =1.18.1
- bioconductor-annotate =1.56.0
- bioconductor-annotationdbi =1.40.0
- bioconductor-biobase =2.38.0
- bioconductor-biocgenerics =0.24.0
- bioconductor-biocparallel =1.12.0
- bioconductor-delayedarray =0.4.1
- bioconductor-genefilter =1.60.0
- bioconductor-geneplotter =1.56.0
- bioconductor-genomeinfodb =1.14.0
- bioconductor-genomeinfodbdata =1.0.0
- bioconductor-genomicranges =1.30.0
- bioconductor-iranges =2.12.0
- bioconductor-s4vectors =0.16.0
- bioconductor-summarizedexperiment =1.8.0
- bioconductor-tximport =1.6.0
- bioconductor-xvector =0.18.0
- bioconductor-zlibbioc =1.24.0
- r-acepack =1.4.1
- r-backports =1.0.5
- r-base64enc =0.1_3
- r-bh =1.65.0_1
- r-bit =1.1_12
- r-bit64 =0.9_5
- r-bitops =1.0_6
- r-blob =1.1.0
- r-catools =1.17.1
- r-checkmate =1.8.2
- r-cluster =2.0.6
- r-colorspace =1.3_2
- r-dbi =0.6_1
- r-dichromat =2.0_0
- r-digest =0.6.12
- r-evaluate =0.10
- r-foreign =0.8_67
- r-formula =1.2_1
- r-futile.logger =1.4.3
- r-futile.options =1.0.0
- r-gdata =2.17.0
- r-getopt =1.20.0
- r-ggplot2 =2.2.0
- r-gplots =3.0.1
- r-gridextra =2.2.1
- r-gtable =0.2.0
- r-gtools =3.5.0
- r-highr =0.6
- r-hmisc =4.0_3
- r-htmltable =1.9
- r-htmltools =0.3.6
- r-htmlwidgets =0.9
- r-jsonlite =1.4
- r-kernsmooth =2.23_15
- r-knitr =1.16
- r-labeling =0.3
- r-lambda.r =1.1.9
- r-lattice =0.20_34
- r-latticeextra =0.6_28
- r-lazyeval =0.2.0
- r-locfit =1.5_9.1
- r-magrittr =1.5
- r-markdown =0.8
- r-mass =7.3_45
- r-matrix =1.2_7.1
- r-matrixstats =0.52.2
- r-memoise =1.1.0
- r-mime =0.5
- r-munsell =0.4.3
- r-nnet =7.3_12
- r-pkgconfig =2.0.1
- r-plogr =0.1_1
- r-plyr =1.8.4
- r-rcolorbrewer =1.1_2
- r-rcpp =0.12.13
- r-rcpparmadillo =0.7.800.2.0
- r-rcurl =1.95_4.8
- r-reshape2 =1.4.2
- r-rjson =0.2.15
- r-rlang =0.1.2
- r-rpart =4.1_10
- r-rsqlite =2.0
- r-scales =0.5.0
- r-snow =0.4_2
- r-stringi =1.1.2
- r-stringr =1.2.0
- r-survival =2.40_1
- r-tibble =1.3.3
- r-viridis =0.4.0
- r-viridislite =0.2.0
- r-xml =3.98_1.6
- r-xtable =1.8_2
- r-yaml =2.1.14
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 | log <- file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type="message")
library("DESeq2")
parallel <- FALSE
if (snakemake@threads > 1) {
library("BiocParallel")
# setup parallelization
register(MulticoreParam(snakemake@threads))
parallel <- TRUE
}
# colData and countData must have the same sample order, but this is ensured
# by the way we create the count matrix -- This not allways work
cts <- read.table(snakemake@input[["counts"]], header=TRUE, row.names="gene", check.names=FALSE)
coldata <- read.table(snakemake@params[["samples"]], header=TRUE, row.names="sample", check.names=FALSE)
#now they are forced to have the same order
coldata$samples <- rownames(coldata)
coldata <- coldata[match(colnames(cts), coldata$samples), ]
dds <- DESeqDataSetFromMatrix(countData=cts,
colData=coldata,
design=~ condition)
# remove uninformative columns
dds <- dds[ rowSums(counts(dds)) > 1, ]
# normalization and preprocessing
dds <- DESeq(dds, parallel=parallel)
saveRDS(dds, file=snakemake@output[[1]])
|
|
| align |
9 |
- star/N1-rep1/Aligned.out.bam
- star/N1-rep1/ReadsPerGene.out.tab
- star/N2-rep1/Aligned.out.bam
- star/N2-rep1/ReadsPerGene.out.tab
- star/N3-rep1/Aligned.out.bam
- star/N3-rep1/ReadsPerGene.out.tab
- star/N4-rep1/Aligned.out.bam
- star/N4-rep1/ReadsPerGene.out.tab
- star/N5-rep1/Aligned.out.bam
- star/N5-rep1/ReadsPerGene.out.tab
- star/G1-rep1/Aligned.out.bam
- star/G1-rep1/ReadsPerGene.out.tab
- star/G2-rep1/Aligned.out.bam
- star/G2-rep1/ReadsPerGene.out.tab
- star/G3-rep1/Aligned.out.bam
- star/G3-rep1/ReadsPerGene.out.tab
- star/G4-rep1/Aligned.out.bam
- star/G4-rep1/ReadsPerGene.out.tab
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
import os
from snakemake.shell import shell
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
sample = [snakemake.input.sample] if isinstance(snakemake.input.sample, str) else snakemake.input.sample
n = len(sample)
assert n == 1 or n == 2, "input->sample must have 1 (single-end) or 2 (paired-end) elements."
if sample[0].endswith(".gz"):
readcmd = "--readFilesCommand zcat"
else:
readcmd = ""
outprefix = os.path.dirname(snakemake.output[0]) + "/"
shell(
"STAR "
"{snakemake.params.extra} "
"--runThreadN {snakemake.threads} "
"--genomeDir {snakemake.params.index} "
"--readFilesIn {snakemake.input.sample} "
"{readcmd} "
"--outSAMtype BAM Unsorted "
"--outFileNamePrefix {outprefix} "
"--outStd Log "
"{log}")
|
|
| rseqc_junction_annotation |
9 |
- qc/rseqc/N1-rep1.junctionanno.junction.bed
- qc/rseqc/N2-rep1.junctionanno.junction.bed
- qc/rseqc/N3-rep1.junctionanno.junction.bed
- qc/rseqc/N4-rep1.junctionanno.junction.bed
- qc/rseqc/N5-rep1.junctionanno.junction.bed
- qc/rseqc/G1-rep1.junctionanno.junction.bed
- qc/rseqc/G2-rep1.junctionanno.junction.bed
- qc/rseqc/G3-rep1.junctionanno.junction.bed
- qc/rseqc/G4-rep1.junctionanno.junction.bed
|
|
|
| junction_annotation.py {params.extra} -i {input.bam} -r {input.bed} -o {params.prefix} > {log[0]} 2>&1
|
|
| rseqc_junction_saturation |
9 |
- qc/rseqc/N1-rep1.junctionsat.junctionSaturation_plot.pdf
- qc/rseqc/N2-rep1.junctionsat.junctionSaturation_plot.pdf
- qc/rseqc/N3-rep1.junctionsat.junctionSaturation_plot.pdf
- qc/rseqc/N4-rep1.junctionsat.junctionSaturation_plot.pdf
- qc/rseqc/N5-rep1.junctionsat.junctionSaturation_plot.pdf
- qc/rseqc/G1-rep1.junctionsat.junctionSaturation_plot.pdf
- qc/rseqc/G2-rep1.junctionsat.junctionSaturation_plot.pdf
- qc/rseqc/G3-rep1.junctionsat.junctionSaturation_plot.pdf
- qc/rseqc/G4-rep1.junctionsat.junctionSaturation_plot.pdf
|
|
|
| junction_saturation.py {params.extra} -i {input.bam} -r {input.bed} -o {params.prefix} > {log} 2>&1
|
|
| rseqc_infer |
9 |
- qc/rseqc/N1-rep1.infer_experiment.txt
- qc/rseqc/N2-rep1.infer_experiment.txt
- qc/rseqc/N3-rep1.infer_experiment.txt
- qc/rseqc/N4-rep1.infer_experiment.txt
- qc/rseqc/N5-rep1.infer_experiment.txt
- qc/rseqc/G1-rep1.infer_experiment.txt
- qc/rseqc/G2-rep1.infer_experiment.txt
- qc/rseqc/G3-rep1.infer_experiment.txt
- qc/rseqc/G4-rep1.infer_experiment.txt
|
|
|
| infer_experiment.py -r {input.bed} -i {input.bam} > {output} 2> {log}
|
|
| rseqc_stat |
9 |
- qc/rseqc/N1-rep1.stats.txt
- qc/rseqc/N2-rep1.stats.txt
- qc/rseqc/N3-rep1.stats.txt
- qc/rseqc/N4-rep1.stats.txt
- qc/rseqc/N5-rep1.stats.txt
- qc/rseqc/G1-rep1.stats.txt
- qc/rseqc/G2-rep1.stats.txt
- qc/rseqc/G3-rep1.stats.txt
- qc/rseqc/G4-rep1.stats.txt
|
|
|
| bam_stat.py -i {input} > {output} 2> {log}
|
|
| rseqc_innerdis |
9 |
- qc/rseqc/N1-rep1.inner_distance_freq.inner_distance.txt
- qc/rseqc/N2-rep1.inner_distance_freq.inner_distance.txt
- qc/rseqc/N3-rep1.inner_distance_freq.inner_distance.txt
- qc/rseqc/N4-rep1.inner_distance_freq.inner_distance.txt
- qc/rseqc/N5-rep1.inner_distance_freq.inner_distance.txt
- qc/rseqc/G1-rep1.inner_distance_freq.inner_distance.txt
- qc/rseqc/G2-rep1.inner_distance_freq.inner_distance.txt
- qc/rseqc/G3-rep1.inner_distance_freq.inner_distance.txt
- qc/rseqc/G4-rep1.inner_distance_freq.inner_distance.txt
|
|
|
| inner_distance.py -r {input.bed} -i {input.bam} -o {params.prefix} > {log} 2>&1
|
|
| rseqc_readdis |
9 |
- qc/rseqc/N1-rep1.readdistribution.txt
- qc/rseqc/N2-rep1.readdistribution.txt
- qc/rseqc/N3-rep1.readdistribution.txt
- qc/rseqc/N4-rep1.readdistribution.txt
- qc/rseqc/N5-rep1.readdistribution.txt
- qc/rseqc/G1-rep1.readdistribution.txt
- qc/rseqc/G2-rep1.readdistribution.txt
- qc/rseqc/G3-rep1.readdistribution.txt
- qc/rseqc/G4-rep1.readdistribution.txt
|
|
|
| read_distribution.py -r {input.bed} -i {input.bam} > {output} 2> {log}
|
|
| rseqc_readdup |
9 |
- qc/rseqc/N1-rep1.readdup.DupRate_plot.pdf
- qc/rseqc/N2-rep1.readdup.DupRate_plot.pdf
- qc/rseqc/N3-rep1.readdup.DupRate_plot.pdf
- qc/rseqc/N4-rep1.readdup.DupRate_plot.pdf
- qc/rseqc/N5-rep1.readdup.DupRate_plot.pdf
- qc/rseqc/G1-rep1.readdup.DupRate_plot.pdf
- qc/rseqc/G2-rep1.readdup.DupRate_plot.pdf
- qc/rseqc/G3-rep1.readdup.DupRate_plot.pdf
- qc/rseqc/G4-rep1.readdup.DupRate_plot.pdf
|
|
|
| read_duplication.py -i {input} -o {params.prefix} > {log} 2>&1
|
|
| rseqc_readgc |
9 |
- qc/rseqc/N1-rep1.readgc.GC_plot.pdf
- qc/rseqc/N2-rep1.readgc.GC_plot.pdf
- qc/rseqc/N3-rep1.readgc.GC_plot.pdf
- qc/rseqc/N4-rep1.readgc.GC_plot.pdf
- qc/rseqc/N5-rep1.readgc.GC_plot.pdf
- qc/rseqc/G1-rep1.readgc.GC_plot.pdf
- qc/rseqc/G2-rep1.readgc.GC_plot.pdf
- qc/rseqc/G3-rep1.readgc.GC_plot.pdf
- qc/rseqc/G4-rep1.readgc.GC_plot.pdf
|
|
|
| read_GC.py -i {input} -o {params.prefix} > {log} 2>&1
|
|
| count_matrix |
1 |
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 | import numpy as np
import pandas as pd
def get_column(strandedness):
if pd.isnull(strandedness) or strandedness == "none":
return 1 #non stranded protocol
elif strandedness == "yes":
return 2 #3rd column
elif strandedness == "reverse":
return 3 #4th column, usually for Illumina truseq
else:
raise ValueError(("'strandedness' column should be empty or have the "
"value 'none', 'yes' or 'reverse', instead has the "
"value {}").format(repr(strandedness)))
counts = [pd.read_table(f, index_col=0, usecols=[0, get_column(strandedness)],
header=None, skiprows=4)
for f, strandedness in zip(snakemake.input, snakemake.params.strand)]
for t, sample in zip(counts, snakemake.params.samples):
t.columns = [sample]
matrix = pd.concat(counts, axis=1)
matrix.index.name = "gene"
# collapse technical replicates
matrix = matrix.groupby(matrix.columns, axis=1).sum()
matrix.to_csv(snakemake.output[0], sep="\t")
|
|
| rseqc_gtf2bed |
1 |
- qc/rseqc/annotation.bed
- qc/rseqc/annotation.db
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 | import gffutils
db = gffutils.create_db(snakemake.input[0],
dbfn=snakemake.output.db,
force=True,
keep_order=True,
merge_strategy='merge',
sort_attribute_values=True,
disable_infer_genes=True,
disable_infer_transcripts=True)
with open(snakemake.output.bed, 'w') as outfileobj:
for tx in db.features_of_type('transcript', order_by='start'):
bed = [s.strip() for s in db.bed12(tx).split('\t')]
bed[3] = tx.id
outfileobj.write('{}\n'.format('\t'.join(bed)))
|
|